122 ◾ Bioinformatics
sh” and copy and save the following script on it and then execute the bash file as “bash
pipeline_bcftools.sh”:
#!/bin/bash
#Sars-Cov2 variant calling
#-------------------------
#1- download fastq files from the NCBI SRA database
mkdir fastq
while read f;
do
fasterq-dump --progress --outdir fastq “$f”
done < ids.txt
#2- download and extract the reference genome
mkdir ref
cd ref
wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/858/895/
GCF_009858895.2_ASM985889v3/GCF_009858895.2_ASM985889v3_genomic.
fna.gz
#Extract the compressed the reference FASTA file
f=$(ls *.*)
gzip -d ${f}
#3- Index the reference FASTA file using samtools and bwa
f=$(ls *.*)
samtools faidx ${f}
bwa index ${f}
cd ..
#4- Align the fastq reads (multiple samples) to the reference
genome
mkdir sam
cd fastq
for i in $(ls *.fastq | rev | cut -c 9- | rev | uniq);
do
bwa mem -M -t 4 \
-R “@RG\tID:${i}\tSM:${i}” \
../ref/GCF_009858895.2_ASM985889v3_genomic.fna \
${i}_1.fastq ${i}_2.fastq > \
../sam/${i}.sam 2> ../sam/${i}.log;
done
cd ..
#5- convert SAM files into BAM files
mkdir bam
cd sam
for i in $(ls *.sam | rev | cut -c 5- | rev);
do
samtools view -uS -o ../bam/${i}.bam ${i}.sam
done
cd ..